package com.shujia.core.transformations

import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}

object SampleOpt {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf()
    conf.setMaster("local")
    conf.setAppName("filter算子操作")

    val sc = new SparkContext(conf)

    val lineRDD: RDD[String] = sc.textFile("spark/data/students.txt")

    /**
     * sample，随机按照比例抽样，结果数量不一定正好达到比例，但是一定是在比例附近
     */
    val sampleResRDD: RDD[String] = lineRDD.sample(withReplacement = true, fraction = 0.1)

    sampleResRDD.foreach(println)

  }
}
